#### Perform regressions to understand effect of prices

### Setup environment

setwd("~/research/coffee-dataverse/src")
do.origspec <- T
do.allyears <- T

source("load.R", encoding="iso-8859-1")

library(dplyr)

### Prepare data

df %>% group_by(Munic�pio) %>% summarize(nyear=length(year), dyear=diff(range(year)))

## Don't ensure every year-muni combination-- don't have weather, and long-diffs informative

df$portion.arabica <- get.portion.arabica(df)
df2 <- df %>% left_join(prices)
df2$price <- df2$price.arabica * df2$portion.arabica + df2$price.robusta * (1 - df2$portion.arabica)

df3 <- df2 %>% left_join(df2 %>% group_by(Munic�pio) %>%
                         summarize(year=year, price.l1=if (length(price) > 1) c(price[2:length(price)], NA) else NA * price,
                                   price.d1=if (length(price) > 1) c(NA, price[1:(length(price)-1)]) else NA * price,
                                   price.d2=if (length(price) > 2) c(NA, NA, price[1:(length(price)-2)]) else NA * price,
                                   price.d3=if (length(price) > 3) c(NA, NA, NA, price[1:(length(price)-3)]) else NA * price,
                                   price.d4=if (length(price) > 4) c(NA, NA, NA, NA, price[1:(length(price)-4)]) else NA * price,
                                   total.harvest.d1=if (length(total.harvest) > 1) c(NA, total.harvest[1:(length(total.harvest)-1)]) else NA * total.harvest,
                                   tmin.d1=if (length(tmin) > 1) c(NA, tmin[1:(length(tmin)-1)]) else NA * tmin,
                                   gdd1000.d1=if (length(gdd1000) > 1) c(NA, gdd1000[1:(length(gdd1000)-1)]) else NA * gdd1000,
                                   edd1000.d1=if (length(edd1000) > 1) c(NA, edd1000[1:(length(edd1000)-1)]) else NA * edd1000,
                                   prcp.d1=if (length(prcp) > 1) c(NA, prcp[1:(length(prcp)-1)]) else NA * prcp,
                                   prcp2.d1=if (length(prcp2) > 1) c(NA, prcp2[1:(length(prcp2)-1)]) else NA * prcp2),
                         by=c('Munic�pio', 'year'))

df3$logharvestdiff <- log(df3$total.harvest / df3$total.harvest.d1)
df3$logharvestdiff[!is.finite(df3$logharvestdiff)] <- NA
df3$logharvest <- log(df3$total.harvest)
df3$logharvest[!is.finite(df3$logharvest)] <- NA
df3$logharvest.d1 <- log(df3$total.harvest.d1)
df3$logharvest.d1[!is.finite(df3$logharvest.d1)] <- NA
df3$tmindiff <- df3$tmin - df3$tmin.d1
df3$gdd1000diff <- df3$gdd1000 - df3$gdd1000.d1
df3$edd1000diff <- df3$edd1000 - df3$edd1000.d1
df3$prcpdiff <- df3$prcp - df3$prcp.d1
df3$prcp2diff <- df3$prcp2 - df3$prcp2.d1

library(lfe)
library(splines)

df3$logpricediff.l1 <- log(df3$price.l1 / df3$price)
df3$logpricediff.d0 <- log(df3$price / df3$price.d1)
df3$logpricediff.d1 <- log(df3$price.d1 / df3$price.d2)
df3$logpricediff.d2 <- log(df3$price.d2 / df3$price.d3)
df3$logpricediff.d3 <- log(df3$price.d3 / df3$price.d4)

## Calculate baseline spec.

mod.base <- felm(as.formula(paste("logharvestdiff ~", gsub("Munic�pio +", "", gsub("(tmin|tmax|gdd1000|edd1000|prcp|prcp2)", "\\1diff", mainspec.rhs), fixed=T), "| 0 | Munic�pio + year")), data=df3, keepCX=T)

## Check for autocorrelation in the error term

calc.acf <- function(region, na.action, resid) {
    rdf <- data.frame(region)
    rdf$error <- NA
    rdf$error[-na.action] <- resid
    rdf2 <- rdf %>% group_by(region) %>% summarize(error0=error - mean(error, na.rm=T),
                                                   error1=lag(error) - mean(lag(error), na.rm=T),
                                                   error2=lag(error, 2) - mean(lag(error, 2), na.rm=T),
                                                   error3=lag(error, 3) - mean(lag(error, 3), na.rm=T),
                                                   error4=lag(error, 4) - mean(lag(error, 4), na.rm=T),
                                                   error5=lag(error, 5) - mean(lag(error, 5), na.rm=T),
                                                   error6=lag(error, 6) - mean(lag(error, 6), na.rm=T),
                                                   error7=lag(error, 7) - mean(lag(error, 7), na.rm=T),
                                                   error8=lag(error, 8) - mean(lag(error, 8), na.rm=T),
                                                   error9=lag(error, 9) - mean(lag(error, 9), na.rm=T),
                                                   error10=lag(error, 10) - mean(lag(error, 10), na.rm=T),
                                                   error11=lag(error, 11) - mean(lag(error, 11), na.rm=T),
                                                   error12=lag(error, 12) - mean(lag(error, 12), na.rm=T),
                                                   error13=lag(error, 13) - mean(lag(error, 13), na.rm=T),
                                                   error14=lag(error, 14) - mean(lag(error, 14), na.rm=T),
                                                   error15=lag(error, 15) - mean(lag(error, 15), na.rm=T),
                                                   error16=lag(error, 16) - mean(lag(error, 16), na.rm=T),
                                                   error17=lag(error, 17) - mean(lag(error, 17), na.rm=T),
                                                   error18=lag(error, 18) - mean(lag(error, 18), na.rm=T),
                                                   error19=lag(error, 19) - mean(lag(error, 19), na.rm=T),
                                                   error20=lag(error, 10) - mean(lag(error, 10), na.rm=T))

    pdf <- data.frame()
    for (delay in 0:20) {
        autocor <- sum(rdf2$error0 * rdf2[, paste0('error', delay)], na.rm=T) / sum(rdf2$error0^2 * !is.na(rdf2[, paste0('error', delay)]), na.rm=T)
        nobs <- sum(!is.na(rdf2$error0) * !is.na(rdf2[, paste0('error', delay)]))
        pdf <- rbind(pdf, data.frame(delay, autocor, nobs))
    }

    pdf
}

## Bootstrap CI

allvals <- matrix(NA, 0, 21)
for (ii in 1:1000) {
    print(ii)
    pdf <- calc.acf(df3$Munic�pio, mod.base$na.action, rnorm(length(mod.base$resid)))
    allvals <- rbind(allvals, pdf$autocor)
}

apply(allvals, 2, function(x) quantile(abs(x), .975))

cilimit <- quantile(abs(allvals[, -1]), .975) # 0.04409476

mod.all <- felm(as.formula(paste("logharvestdiff ~ logpricediff.d0 + logpricediff.d1 + logpricediff.d2 + logpricediff.d3 + ", gsub("Munic�pio +", "", gsub("(tmin|tmax|gdd1000|edd1000|prcp|prcp2)", "\\1diff", mainspec.rhs), fixed=T), "| 0 | Munic�pio + year")), data=df3, keepCX=T)

## Display ACF graph

pdf <- rbind(cbind(model='Baseline model', calc.acf(df3$Munic�pio, mod.base$na.action, mod.base$resid)),
             cbind(model='All prices (t to t-3) model', calc.acf(df3$Munic�pio, mod.all$na.action, mod.all$resid)))

library(ggplot2)

pdf$model <- factor(pdf$model, levels=c('Baseline model', 'All prices (t to t-3) model'))
ggplot(pdf, aes(delay, autocor)) +
    facet_wrap( ~ model) + geom_col() +
    geom_hline(yintercept=cilimit, linetype='dashed') +
    geom_hline(yintercept=-cilimit, linetype='dashed') + theme_bw() +
    xlab("Autocorrelation delay (years)") + ylab("Autocorrelation")

cor(pdf$autocor[pdf$model == 'Baseline model'], pdf$autocor[pdf$model == 'All prices (t to t-3) model'])

## Main regressions

mod.d0 <- felm(as.formula(paste("logharvestdiff ~ logpricediff.d0 + ", gsub("Munic�pio +", "", gsub("(tmin|tmax|gdd1000|edd1000|prcp|prcp2)", "\\1diff", mainspec.rhs), fixed=T), "| 0 | Munic�pio + year")), data=df3, keepCX=T)
mod.d1 <- felm(as.formula(paste("logharvestdiff ~ logpricediff.d1 + ", gsub("Munic�pio +", "", gsub("(tmin|tmax|gdd1000|edd1000|prcp|prcp2)", "\\1diff", mainspec.rhs), fixed=T), "| 0 | Munic�pio + year")), data=df3, keepCX=T)
mod.d2 <- felm(as.formula(paste("logharvestdiff ~ logpricediff.d2 + ", gsub("Munic�pio +", "", gsub("(tmin|tmax|gdd1000|edd1000|prcp|prcp2)", "\\1diff", mainspec.rhs), fixed=T), "| 0 | Munic�pio + year")), data=df3, keepCX=T)
mod.d3 <- felm(as.formula(paste("logharvestdiff ~ logpricediff.d3 + ", gsub("Munic�pio +", "", gsub("(tmin|tmax|gdd1000|edd1000|prcp|prcp2)", "\\1diff", mainspec.rhs), fixed=T), "| 0 | Munic�pio + year")), data=df3, keepCX=T)

library(stargazer)

stargazer(list(mod.base, mod.all, mod.d0, mod.d1, mod.d2, mod.d3),
          add.lines=list(c('Municipality FE', rep('Yes', 6)),
                         c('Year FE', rep('Yes', 6)),
                         c('State Trends', rep('Cubic', 6))),
          dep.var.labels=rep("$\\Delta \\log \\nicefrac{h_t}{h_{t-1}}$", 6),
          covariate.labels=c("$\\Delta \\log \\xprice_{t}$",
                             "$\\Delta \\log \\xprice_{t-1}$",
                             "$\\Delta \\log \\xprice_{t-2}$",
                             "$\\Delta \\log \\xprice_{t-3}$",
                             paste0("$\\Delta \\text{", gsub("\\^2", "$^2$", weatherlabels), "}$")),
          omit=':', df=F, float=F)

source("conley.R")
cse.base <- ConleySEs(mod.base, df3[-mod.base$na.action,], "Munic�pio", "year", "latitude", "longitude", dist_cutoff=400, lag_cutoff=1)
cse.all <- ConleySEs(mod.all, df3[-mod.all$na.action,], "Munic�pio", "year", "latitude", "longitude", dist_cutoff=400, lag_cutoff=1)
cse.d0 <- ConleySEs(mod.d0, df3[-mod.d0$na.action,], "Munic�pio", "year", "latitude", "longitude", dist_cutoff=400, lag_cutoff=1)
cse.d1 <- ConleySEs(mod.d1, df3[-mod.d1$na.action,], "Munic�pio", "year", "latitude", "longitude", dist_cutoff=400, lag_cutoff=1)
cse.d2 <- ConleySEs(mod.d2, df3[-mod.d2$na.action,], "Munic�pio", "year", "latitude", "longitude", dist_cutoff=400, lag_cutoff=1)
cse.d3 <- ConleySEs(mod.d3, df3[-mod.d3$na.action,], "Munic�pio", "year", "latitude", "longitude", dist_cutoff=400, lag_cutoff=1)

## Generete autoreg prices

rhores <- data.frame()
for (rho in c(0, .1, .2, .3, .4, .5, .6)) {
    df3 <- df2 %>% left_join(df2 %>% group_by(Munic�pio) %>%
                             summarize(year=year, price.autoreg=stats::filter(price, rho, method='recursive', init=price[1] / (1 - rho)),
                                       price.d2=if (length(price) > 2) c(NA, NA, price.autoreg[1:(length(price)-2)]) else NA * price,
                                       price.d3=if (length(price) > 3) c(NA, NA, NA, price[1:(length(price)-3)]) else NA * price,
                                       total.harvest.d1=if (length(total.harvest) > 1) c(NA, total.harvest[1:(length(total.harvest)-1)]) else NA * total.harvest,
                                       tmin.d1=if (length(tmin) > 1) c(NA, tmin[1:(length(tmin)-1)]) else NA * tmin,
                                       gdd1000.d1=if (length(gdd1000) > 1) c(NA, gdd1000[1:(length(gdd1000)-1)]) else NA * gdd1000,
                                       edd1000.d1=if (length(edd1000) > 1) c(NA, edd1000[1:(length(edd1000)-1)]) else NA * edd1000,
                                       prcp.d1=if (length(prcp) > 1) c(NA, prcp[1:(length(prcp)-1)]) else NA * prcp,
                                       prcp2.d1=if (length(prcp2) > 1) c(NA, prcp2[1:(length(prcp2)-1)]) else NA * prcp2),
                             by=c('Munic�pio', 'year'))

    df3$logharvestdiff <- log(df3$total.harvest / df3$total.harvest.d1)
    df3$logharvestdiff[!is.finite(df3$logharvestdiff)] <- NA
    df3$logharvest <- log(df3$total.harvest)
    df3$logharvest[!is.finite(df3$logharvest)] <- NA
    df3$logharvest.d1 <- log(df3$total.harvest.d1)
    df3$logharvest.d1[!is.finite(df3$logharvest.d1)] <- NA
    df3$tmindiff <- df3$tmin - df3$tmin.d1
    df3$gdd1000diff <- df3$gdd1000 - df3$gdd1000.d1
    df3$edd1000diff <- df3$edd1000 - df3$edd1000.d1
    df3$prcpdiff <- df3$prcp - df3$prcp.d1
    df3$prcp2diff <- df3$prcp2 - df3$prcp2.d1

    df3$logpricediff.d2.autoreg <- log(df3$price.d2 / df3$price.d3)

    mod.d2n <- felm(as.formula(paste("logharvestdiff ~ logpricediff.d2.autoreg + ", gsub("(tmin|tmax|gdd1000|edd1000|prcp|prcp2)", "\\1diff", mainspec.rhs), "| 0 | Munic�pio + year")), data=df3, keepCX=T)

    rhores <- rbind(rhores, data.frame(rho, coeff=mod.d2n$coeff[1], se=mod.d2n$cse[1], rsqr=summary(mod.d2n)$P.r.squared))
}
rhores$tval <- rhores$coeff / rhores$se

library(xtable)
print(xtable(rhores, digits=7), include.rownames=F)

## Print the tables
## NOTE: To generate tables, Set rho to 0.3 and re-run code in loop above

stargazer(list(mod.base, mod.all, mod.d0, mod.d1, mod.d2, mod.d3, mod.d2n),
          add.lines=list(c('Year FE', rep('Yes', 7)),
                         c('State Trends', rep('Cubic', 7))),
          dep.var.labels=rep("$\\Delta \\log \\nicefrac{h_t}{h_{t-1}}$", 7),
          covariate.labels=c("$\\Delta \\log \\xprice_{t}$",
                             "$\\Delta \\log \\xprice_{t-1}$",
                             "$\\Delta \\log \\xprice_{t-2}$",
                             "$\\Delta \\log \\xprice_{t-3}$",
                             "$\\Delta \\log f(\\xprice_{t-2}, 0.3)$",
                             paste0("$\\Delta \\text{", gsub("\\^2", "$^2$", weatherlabels), "}$")),
          omit=':', df=F, float=F)

cse.d2n <- ConleySEs(mod.d2n, df3[-mod.d2n$na.action,], "Munic�pio", "year", "latitude", "longitude", dist_cutoff=400, lag_cutoff=1)

stargazer(list(mod.base, mod.all, mod.d0, mod.d1, mod.d2, mod.d3, mod.d2n),
          add.lines=list(c('Year FE', rep('Yes', 7)),
                         c('State Trends', rep('Cubic', 7))),
          dep.var.labels=rep("$\\Delta \\log \\nicefrac{h_t}{h_{t-1}}$", 7),
          covariate.labels=c("$\\Delta \\log \\xprice_{t}$",
                             "$\\Delta \\log \\xprice_{t-1}$",
                             "$\\Delta \\log \\xprice_{t-2}$",
                             "$\\Delta \\log \\xprice_{t-3}$",
                             "$\\Delta \\log f(\\xprice_{t-2}, 0.3)$",
                             paste0("$\\Delta \\text{", gsub("\\^2", "$^2$", weatherlabels), "}$")),
          omit=':', df=F, float=F,
          se=list(sqrt(diag(cse.base$Spatial_HAC)), sqrt(diag(cse.all$Spatial_HAC)), sqrt(diag(cse.d0$Spatial_HAC)), sqrt(diag(cse.d1$Spatial_HAC)), sqrt(diag(cse.d2$Spatial_HAC)), sqrt(diag(cse.d3$Spatial_HAC)), sqrt(diag(cse.d2n$Spatial_HAC))))
